Defining the input and output files
# Clean the working environment
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
####################################
# Defining the input files
####################################
Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")
Sweep_test_dir <- Sys.getenv("Sweep_test_dir")
###############
## Empirical ###
###############
### ROH hotspots ###
Empirical_data_ROH_hotspots_dir <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Empirical_data_F_ROH_dir <- Sys.getenv("Empirical_data_F_ROH_dir")
### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")
###############
## Simulated ###
###############
### Causative variant ###
Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")
### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")
Selection_model_ROH_hotspots_dir <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Neutral_model_F_ROH_dir <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir <- Sys.getenv("Selection_model_F_ROH_dir")
### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")
histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue"
selection_model_color <- "purple"
# Sys.getenv()
# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
| Selection_coefficient | Avg_Fixation_time | Min_fixation_time | Max_fixation_time | |
|---|---|---|---|---|
| 2 | s01_chr3 | 33.2 | 28 | 36 |
| 1 | s005_chr3 | 33.0 | 29 | 39 |
| 3 | s02_chr3 | 32.6 | 29 | 37 |
| 4 | s03_chr3 | 27.2 | 23 | 30 |
| 6 | s05_chr3 | 20.8 | 15 | 30 |
| 5 | s04_chr3 | 20.4 | 14 | 31 |
| 7 | s06_chr3 | 12.0 | 10 | 14 |
| 8 | s07_chr3 | 10.8 | 9 | 14 |
| 9 | s08_chr3 | 7.4 | 6 | 9 |
| Selection_coefficient | Avg_Length_Mb | Min_freq | Max_freq | Avg_window_freq | Avg_freq_variant_100k_window | |
|---|---|---|---|---|---|---|
| 4 | s03_chr3 | 3.94 | 0.48 | 1.00 | 0.8389348 | 0.840 |
| 2 | s01_chr3 | 5.64 | 0.28 | 0.96 | 0.6922190 | 0.676 |
| 7 | s06_chr3 | 5.92 | 0.38 | 1.00 | 0.8641287 | 0.872 |
| 6 | s05_chr3 | 6.02 | 0.56 | 1.00 | 0.8995497 | 0.912 |
| 3 | s02_chr3 | 6.06 | 0.40 | 0.96 | 0.7634277 | 0.776 |
| 9 | s08_chr3 | 6.58 | 0.32 | 1.00 | 0.7996209 | 0.804 |
| 5 | s04_chr3 | 6.72 | 0.54 | 1.00 | 0.8163600 | 0.820 |
| 1 | s005_chr3 | 10.44 | 0.40 | 0.98 | 0.7454841 | 0.656 |
| 8 | s07_chr3 | 23.44 | 0.14 | 1.00 | 0.8795011 | 0.844 |
Confidence level <=> konfidensgrad
# Function to calculate bootstrap confidence intervals
bootstrap_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
# Function to calculate the mean for each bootstrap sample
compute_bootstrap_mean_fun <- function(observed_data_urn) {
bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
bootstrap_estimate <- mean(bootstrap_dataset)
return(bootstrap_estimate)
}
# Perform bootstrap resampling
bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
# Calculate the percentiles for the confidence interval
alpha <- (1 - confidence_level) / 2
confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
# Return the confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}
## ROH-hotspot selection testing results://n
| Model | Avg_ROH_hotspot_threshold |
|---|---|
| Neutral | 0.416 |
| selection_model_s02_chr3 | 0.452 |
| selection_model_s03_chr3 | 0.456 |
| selection_model_s005_chr3 | 0.476 |
| selection_model_s01_chr3 | 0.496 |
| selection_model_s05_chr3 | 0.496 |
| selection_model_s04_chr3 | 0.532 |
| selection_model_s08_chr3 | 0.564 |
| selection_model_s07_chr3 | 0.568 |
| selection_model_s06_chr3 | 0.600 |
| Empirical | 0.750 |
## Overall Average Avg_F_ROH for german_shepherd : 0.2750777 //n
## Point estimate F_ROH across all 20 simulations: 0.3411836 //n
## [1] "Bootstrap 95% Confidence Interval: [0.299992136, 0.38932096]"
## Point estimate F_ROH across all 20 simulations for selection_model_s005_chr3 : 0.3851778 //n[1] "Bootstrap 95% Confidence Interval: [0.36704765, 0.40664288]"
## Point estimate F_ROH across all 20 simulations for selection_model_s01_chr3 : 0.3901095 //n[1] "Bootstrap 95% Confidence Interval: [0.32800948, 0.452287203]"
## Point estimate F_ROH across all 20 simulations for selection_model_s02_chr3 : 0.389057 //n[1] "Bootstrap 95% Confidence Interval: [0.34961284, 0.41288596]"
## Point estimate F_ROH across all 20 simulations for selection_model_s03_chr3 : 0.3750411 //n[1] "Bootstrap 95% Confidence Interval: [0.34084448, 0.421364716]"
## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr3 : 0.4344734 //n[1] "Bootstrap 95% Confidence Interval: [0.39021996, 0.47333448]"
## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr3 : 0.4034206 //n[1] "Bootstrap 95% Confidence Interval: [0.36243468, 0.44244412]"
## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr3 : 0.485658 //n[1] "Bootstrap 95% Confidence Interval: [0.41431812, 0.55071608]"
## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr3 : 0.4494377 //n[1] "Bootstrap 95% Confidence Interval: [0.426247766, 0.483897177]"
## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr3 : 0.4411038 //n[1] "Bootstrap 95% Confidence Interval: [0.382334925, 0.49404564]"
| Model | F_ROH | Lower_CI | Upper_CI |
|---|---|---|---|
| Empirical | 0.27508 | NA | NA |
| Neutral | 0.34118 | 0.29999 | 0.38932 |
| s03 | 0.37504 | 0.34084 | 0.42136 |
| s005 | 0.38518 | 0.36705 | 0.40664 |
| s02 | 0.38906 | 0.34961 | 0.41289 |
| s01 | 0.39011 | 0.32801 | 0.45229 |
| s05 | 0.40342 | 0.36243 | 0.44244 |
| s04 | 0.43447 | 0.39022 | 0.47333 |
| s08 | 0.44110 | 0.38233 | 0.49405 |
| s07 | 0.44944 | 0.42625 | 0.48390 |
| s06 | 0.48566 | 0.41432 | 0.55072 |
## Models where empirical F_ROH is within CI:
## character(0)
##
## Models where empirical F_ROH is outside CI:
## [1] "s005" "s01" "s02" "s03" "s04" "s05" "s06"
## [8] "s07" "s08" "Neutral"
| Model | H_e_5th_percentile | Lower_CI | Upper_CI |
|---|---|---|---|
| s08 | 0.12414 | 0.10877 | 0.13782 |
| s01 | 0.12632 | 0.11628 | 0.13635 |
| s005 | 0.12664 | 0.11628 | 0.13700 |
| s02 | 0.13061 | 0.11628 | 0.14753 |
| Neutral | 0.13338 | 0.12955 | 0.14040 |
| s07 | 0.13473 | 0.11248 | 0.15188 |
| s04 | 0.13676 | 0.11968 | 0.15384 |
| s06 | 0.13681 | 0.12316 | 0.15346 |
| s03 | 0.13703 | 0.12080 | 0.16008 |
| s05 | 0.13928 | 0.13360 | 0.14547 |
| Empirical | NA | NA | NA |
##
## ROH-hotspot threshold comparison between the different datasets
| Model | Avg_ROH_hotspot_threshold |
|---|---|
| Neutral | 0.416 |
| selection_model_s02_chr3 | 0.452 |
| selection_model_s03_chr3 | 0.456 |
| selection_model_s005_chr3 | 0.476 |
| selection_model_s01_chr3 | 0.496 |
| selection_model_s05_chr3 | 0.496 |
| selection_model_s04_chr3 | 0.532 |
| selection_model_s08_chr3 | 0.564 |
| selection_model_s07_chr3 | 0.568 |
| selection_model_s06_chr3 | 0.600 |
| Empirical | 0.750 |
| Model | F_ROH | Lower_CI | Upper_CI |
|---|---|---|---|
| Empirical | 0.27508 | NA | NA |
| Neutral | 0.34118 | 0.29999 | 0.38932 |
| s03 | 0.37504 | 0.34084 | 0.42136 |
| s005 | 0.38518 | 0.36705 | 0.40664 |
| s02 | 0.38906 | 0.34961 | 0.41289 |
| s01 | 0.39011 | 0.32801 | 0.45229 |
| s05 | 0.40342 | 0.36243 | 0.44244 |
| s04 | 0.43447 | 0.39022 | 0.47333 |
| s08 | 0.44110 | 0.38233 | 0.49405 |
| s07 | 0.44944 | 0.42625 | 0.48390 |
| s06 | 0.48566 | 0.41432 | 0.55072 |
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the fifth percentile of the neutral models average H_e are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.12912"
| Name | Under_selection | Window_based_Average_H_e |
|---|---|---|
| Hotspot_chr3_window_1 | Yes | 0.1082216 |
| Hotspot_chr3_window_3 | Yes | 0.1230482 |
| Hotspot_chr3_window_2 | No | 0.1445314 |
| Hotspot_chr17_window_1 | No | 0.1449621 |
| Hotspot_chr17_window_2 | No | 0.1486567 |
| Hotspot_chr19_window_1 | No | 0.1603723 |
## [1] "ROH-hotspots under selection:"
| Name | Under_selection | Window_based_Average_H_e |
|---|---|---|
| Hotspot_chr3_window_1 | Yes | 0.1082216 |
| Hotspot_chr3_window_3 | Yes | 0.1230482 |
## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## [1] "s005" "s01" "s02" "s03" "s04" "s05" "s06"
## [8] "s07" "s08" "Neutral"
## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## [1] "s005" "s01" "s02" "s03" "s04" "s07" "s08"
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## [1] "s05" "s06" "Neutral"
## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## [1] "s02" "s03" "s04" "s05" "s06" "s07"
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## [1] "s005" "s01" "s08" "Neutral"
## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## [1] "s02" "s03" "s04" "s05" "s06" "s07"
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## [1] "s005" "s01" "s08" "Neutral"
## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## [1] "s03" "s04" "s06" "s07"
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## [1] "s005" "s01" "s02" "s05" "s08" "Neutral"
## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## [1] "s005" "s01" "s02" "s03" "s04" "s05" "s06"
## [8] "s07" "s08" "Neutral"
| Selection_Coefficient | H_e_Threshold |
|---|---|
| s005 | 0.11280 |
| s01 | 0.11280 |
| s02 | 0.11280 |
| s03 | 0.11454 |
| s04 | 0.11280 |
| s06 | 0.11280 |
| s07 | 0.09500 |
| s08 | 0.10028 |
##
## Hotspot Name: Hotspot_chr3_window_1
##
## Hotspot Name: Hotspot_chr3_window_3